rm(list=ls())
#' 9017 Online Panels Benchmarking Study SRM Paper
#' Analysis syntax
#'
#' @author  Dina Neiger
#' @version 20220112
#'
#'
#' @input   
#' SPSS file downloaded from the Australian Data Archive into a local working directory
#' 
#' 
#' 
#' @output
#' Rdata frame to be used in the analysis

options(java.parameters = "-Xmx6096m")


library("plyr")
library("xlsx")
library(reshape2)
library("haven")

# Date formats for xlsxssf
options(xlsx.date.format="dd/MM/yyyy")
options(xlsx.datetime.format="dd/MM/yyyy")

# Windows stub
Z_PATH <- "Z:/"

setwd("Z:/Research Papers and Presentations/SRM article/Submission syntax")

WRK_DIR <- setwd("Z:/Research Papers and Presentations/SRM article/Submission syntax")

INP_DIR <- paste0(WRK_DIR,"/Inputs/")
OUT_DIR <- paste0(WRK_DIR, "/Outputs/")

### read spss 
datar       <- read_sav(paste0(INP_DIR,"ADA download/au.edu.anu.ada.ddi.01329.sav"))


### assign survey labels
table(datar$surtype)

surtype_label<-as.data.frame(attributes(datar$surtype)[5])
surtype_label$value <- surtype_label$labels
surtype_label$longl <- row.names(surtype_label)
surtype_label$surtype.l <- c("1RDD","2ABS","3ANU","4P1","5P2","6P3","7P4","8P5")
findata <- merge(datar,surtype_label[,c("value","surtype.l")],by.x="surtype",by.y="value",all.x=T)


### derive demographic variables
###derive additional demographic variables if required
table(findata$atsi)
findata$atsi[findata$atsi %in% 2 | findata$atsi %in%  9]<-29
table(findata$atsi)

findata$employment <-0
findata$employment[findata$d6 %in% 1 | findata$d7 %in% 1 | findata$d8 %in% 1] <-1

findata$remoteness <- findata$remotene



#rebase income variable to employed only persons
findata$d16_all  <- findata$d16
findata$d16_base <- 0
findata$d16_base[findata$d6 %in% 1 | findata$d8 %in% 1] <- 1
table(findata$d16_base)
table(findata$d16,findata$d16_base)
findata$d16[findata$d16_base %in% 0] <-NA
table(findata$d16,findata$d16_base,exclude=NULL)
table(findata$d16_all,findata$d16_base, exclude=NULL)
table(is.na(findata$d16))
table(is.na(findata$d16),findata$employment)


###check counts by survey
table(findata$surtype.l)

### save analysis dataset
save(findata,file=paste0(OUT_DIR,"findata.RData"))
